---
title: "Replication File"
output:
  pdf_document: default
  html_document: default
date: "2023-10-20"
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(warning = FALSE, message = FALSE) 
library(dplyr)
library(haven)
library(stargazer)
library(broom)
library(GGally)
library(plm)
library(interplot)
library(ggpubr)
library(lmtest)
library(sandwich)
library(DRDID)
library(gridExtra)
# set working directory
```

## Summary Statistics

```{r}

# load dataset
df <- read_sav("FID_3154.sav")

# subset data to variables of interest and omit NAs

sub_df_1 <- subset(df, select = c(AÑO, MES, NUMENTR, A.1, A.6, A.8.1, B.2, 
                                      B.3, B.4, E.1, G.1, H.3))

sub_df <- subset(sub_df_1, df$A.8.1 != 3 & df$B.2 != 8 & df$B.2 != 9 & df$B.3 != 8 
                   & df$B.3 != 9 & df$B.4 != 8 & df$B.4 != 9 & df$B.3 != 6 & 
                     df$E.1 != 9 & df$H.3 != 8 & df$H.3 != 9)

# rename columns

colnames(sub_df)[4] <- "region"
colnames(sub_df)[5] <- "municipality"
colnames(sub_df)[6] <- "nationality"
colnames(sub_df)[7] <- "regularization"
colnames(sub_df)[8] <- "tolerance"
colnames(sub_df)[9] <- "reception"
colnames(sub_df)[10] <- "education"
colnames(sub_df)[11] <- "ideology"
colnames(sub_df)[12] <- "economy"

# make sure variables are numeric

sub_df <- as.data.frame(sub_df)

# view summary of variables

summary(sub_df)

```

## Statistical Analysis

### Opinions Towards Regularization of Irregular Migrants

```{r}

# group together question 2 and 3 to created more ordinal scale; reverse ordering

sub_df$regularize <- ifelse(sub_df$regularization == 1, 4, 
                        ifelse(sub_df$regularization == 2 | 
                            sub_df$regularization == 3, 3,
                        ifelse(sub_df$regularization == 4, 2, 
                        ifelse(sub_df$regularization == 5, 1, NA))))

# recreate summary statistics for new regularize variable

summary(sub_df)

stargazer(sub_df)

# create time variable: after 2014 = 1

sub_df$time <- ifelse(sub_df$AÑO >= 2014, 1, 0)

# plot pre-treatment IV

hist(sub_df$time)

# create treatment variable: Cataluña = 1

sub_df$treated <- ifelse(sub_df$region == 9, 1, 0)

# plot treatment IV

hist(sub_df$treated)

# rename values

sub_df$Barcelona <- sub_df$treated
sub_df$Post.2014 <- sub_df$time

# run DiD linear model

lm_regularize <- lm(regularize ~ Barcelona + Post.2014 + Barcelona*Post.2014 +
                        nationality + ideology + economy, data = sub_df)

summary(lm_regularize)

# cluster robust standard errors on group

coeftest(lm_regularize, vcov = vcovCL, type = "HC1", cluster = ~Barcelona)

```

### Tolerance for Liberal Irregular Migration Laws

```{r}

# plot DV - excluding those that do not know immigration law

hist(sub_df$tolerance)

# create time variable: after 2012 = 1

sub_df$time <- ifelse(sub_df$AÑO >= 2012, 1, 0)

# plot pre-treatment IV

hist(sub_df$time)

# create treatment variable: Barcelona = 1

sub_df$treated <- ifelse(sub_df$region == 9, 1, 0)

# plot treatment IV

hist(sub_df$treated)

# rename variables

sub_df$Barcelona <- sub_df$treated
sub_df$Post.2012 <- sub_df$time

# run DiD linear model

lm_tolerance <- lm(tolerance ~ Barcelona + Post.2012 + Barcelona*Post.2012 + nationality +
             ideology + economy, data = sub_df)

# cluster robust standard errors on group

coeftest(lm_tolerance, vcov = vcovCL, type = "HC1", cluster = ~Barcelona)

ggcoef(lm_tolerance, exclude_intercept = TRUE)

summary(lm_tolerance)

# create linear model table with both models

stargazer(lm_regularize, lm_tolerance)

# create combined marginal effects plots

marg_reg1 <- interplot(m = lm_regularize, var1 = "Post.2014", var2 = "Barcelona") +
  xlab("Not Barcelona versus Barcelona Respondents") +
  ylab("Marginal Effect of Regularization of Migrants") +
  theme_bw() + theme(panel.border = element_blank(), panel.grid.major = 
                       element_blank(),
                     panel.grid.minor = element_blank(), 
                     axis.line = element_line(colour = "black")) +
                       geom_point(position = position_dodge(width = 0.9))

marg_tol1 <- interplot(m = lm_tolerance, var1 = "Post.2012", var2 = "Barcelona") +
  xlab("Not Barcelona versus Barcelona Respondents") +
  ylab("Marginal Effect of Tolerance of Migration Policy") +
  theme_bw() + theme(panel.border = element_blank(), panel.grid.major = 
                       element_blank(),
                     panel.grid.minor = element_blank(), 
                     axis.line = element_line(colour = "black")) +
                       geom_point(position = position_dodge(width = 0.9))

plot_final <- grid.arrange(marg_reg1, marg_tol1, ncol=2)

ggsave("margplots.png", plot_final, scale = .8, 
       width = 8.09, height = 5, dpi = 300, 
       units = "in")

```

## Robustness Checks

### Parallel trends assumption pre-test

```{r}
# Doubly-robust DiD estimator by Sant'Anna and Zhao 2020

# dataset with just covarietes

sub_cov <- subset(sub_df, select = c("nationality", "economy", "ideology"))

# remove NAs

sub_df <- na.omit(sub_df)

out <- drdid_imp_rc(y = sub_df$regularize, 
             post = sub_df$Post.2014, 
             D = sub_df$Barcelona,
             covariates = sub_cov)

summary(out)


out2 <- drdid_imp_rc(y = sub_df$tolerance, post = sub_df$Post.2012, 
              D = sub_df$Barcelona, 
              covariates = sub_cov)

summary(out2)

```

### Other Cut Off Points

```{r}

# run with different cut offs

# regularize

# create time variable: after 2009 = 1

sub_df$time <- ifelse(sub_df$AÑO >= 2009, 1, 0)

# plot pre-treatment IV

hist(sub_df$time)

# create treatment variable: Cataluña = 1

sub_df$treated <- ifelse(sub_df$region == 9, 1, 0)

# plot treatment IV

hist(sub_df$treated)

# rename values

sub_df$Barcelona <- sub_df$treated
sub_df$Post.2009 <- sub_df$time

# run DiD linear model

lm_regularize09 <- lm(regularize ~ Barcelona + Post.2009 + Barcelona*Post.2009 +
                        nationality + ideology + economy, data = sub_df)

summary(lm_regularize09)

# tolerance - 2010

# create time variable: after 2010 = 1

sub_df$time <- ifelse(sub_df$AÑO >= 2010, 1, 0)

# plot pre-treatment IV

hist(sub_df$time)

# create treatment variable: Barcelona = 1

sub_df$treated <- ifelse(sub_df$region == 9, 1, 0)

# plot treatment IV

hist(sub_df$treated)

# rename variables

sub_df$Barcelona <- sub_df$treated
sub_df$Post.2010 <- sub_df$time

# run DiD linear model

lm_tolerance10 <- lm(tolerance ~ Barcelona + Post.2010 + Barcelona*Post.2010 + nationality +
             ideology + economy, data = sub_df)

summary(lm_tolerance10)

stargazer(lm_regularize09, lm_tolerance10)

```

```{r}

# plot average treatment effect over time - regularize

treated_08 <- mean(sub_df$regularize[sub_df$AÑO == 2008 & sub_df$treated == 1])

untreated_08 <- mean(sub_df$regularize[sub_df$AÑO == 2008 & sub_df$treated == 0])

treated_09 <- mean(sub_df$regularize[sub_df$AÑO == 2009 & sub_df$treated == 1])

untreated_09 <- mean(sub_df$regularize[sub_df$AÑO == 2009 & sub_df$treated == 0])

treated_10 <- mean(sub_df$regularize[sub_df$AÑO == 2010 & sub_df$treated == 1])

untreated_10 <- mean(sub_df$regularize[sub_df$AÑO == 2010 & sub_df$treated == 0])

treated_11 <- mean(sub_df$regularize[sub_df$AÑO == 2011 & sub_df$treated == 1])

untreated_11 <- mean(sub_df$regularize[sub_df$AÑO == 2011 & sub_df$treated == 0])

treated_12 <- mean(sub_df$regularize[sub_df$AÑO == 2012 & sub_df$treated == 1])

untreated_12 <- mean(sub_df$regularize[sub_df$AÑO == 2012 & sub_df$treated == 0])

treated_14 <- mean(sub_df$regularize[sub_df$AÑO == 2014 & sub_df$treated == 1])

untreated_14 <- mean(sub_df$regularize[sub_df$AÑO == 2014 & sub_df$treated == 0])

treated_15 <- mean(sub_df$regularize[sub_df$AÑO == 2015 & sub_df$treated == 1])

untreated_15 <- mean(sub_df$regularize[sub_df$AÑO == 2015 & sub_df$treated == 0])

treated_16 <- mean(sub_df$regularize[sub_df$AÑO == 2016 & sub_df$treated == 1])

untreated_16 <- mean(sub_df$regularize[sub_df$AÑO == 2016 & sub_df$treated == 0])

treated_17 <- mean(sub_df$regularize[sub_df$AÑO == 2017 & sub_df$treated == 1])

untreated_17 <- mean(sub_df$regularize[sub_df$AÑO == 2017 & sub_df$treated == 0])

new_data_2 <- data.frame(c(2008, 2009, 2010, 2011, 2012, 2014, 2015, 2016, 2017), c(treated_08, treated_09, treated_10, treated_11, treated_12, treated_14, treated_15, treated_16, treated_17), c(untreated_08, untreated_09, untreated_10, untreated_11, untreated_12, untreated_14, untreated_15, untreated_16, untreated_17))

colnames(new_data_2) <- c("year", "Barcelona", "Not Barcelona")

library(ggplot2)

ggplot(new_data_2, mapping = aes(year)) +
  geom_line(aes(y = `Not Barcelona`, colour = "Not Barcelona")) +
  geom_line(aes(y = Barcelona, colour = "Barcelona")) +
  geom_vline(xintercept=2014, linetype = "dotted", color = "black", size = 1.5) +
  scale_x_continuous(breaks = c(2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017)) +
  labs(x = "Year", y = "Attitudes Towards Regularization", color=NULL)

ggsave("regularize_plot1.png", scale = .8, 
       width = 8.09, height = 5, dpi = 300, 
       units = "in")

```

```{r}

# plot average treatment effect over time - tolerance

treated_08 <- mean(sub_df$tolerance[sub_df$AÑO == 2008 & sub_df$treated == 1])

untreated_08 <- mean(sub_df$tolerance[sub_df$AÑO == 2008 & sub_df$treated == 0])

treated_09 <- mean(sub_df$tolerance[sub_df$AÑO == 2009 & sub_df$treated == 1])

untreated_09 <- mean(sub_df$tolerance[sub_df$AÑO == 2009 & sub_df$treated == 0])

treated_10 <- mean(sub_df$tolerance[sub_df$AÑO == 2010 & sub_df$treated == 1])

untreated_10 <- mean(sub_df$tolerance[sub_df$AÑO == 2010 & sub_df$treated == 0])

treated_11 <- mean(sub_df$tolerance[sub_df$AÑO == 2011 & sub_df$treated == 1])

untreated_11 <- mean(sub_df$tolerance[sub_df$AÑO == 2011 & sub_df$treated == 0])

treated_12 <- mean(sub_df$tolerance[sub_df$AÑO == 2012 & sub_df$treated == 1])

untreated_12 <- mean(sub_df$tolerance[sub_df$AÑO == 2012 & sub_df$treated == 0])

treated_14 <- mean(sub_df$tolerance[sub_df$AÑO == 2014 & sub_df$treated == 1])

untreated_14 <- mean(sub_df$tolerance[sub_df$AÑO == 2014 & sub_df$treated == 0])

treated_15 <- mean(sub_df$tolerance[sub_df$AÑO == 2015 & sub_df$treated == 1])

untreated_15 <- mean(sub_df$tolerance[sub_df$AÑO == 2015 & sub_df$treated == 0])

treated_16 <- mean(sub_df$tolerance[sub_df$AÑO == 2016 & sub_df$treated == 1])

untreated_16 <- mean(sub_df$tolerance[sub_df$AÑO == 2016 & sub_df$treated == 0])

treated_17 <- mean(sub_df$tolerance[sub_df$AÑO == 2017 & sub_df$treated == 1])

untreated_17 <- mean(sub_df$tolerance[sub_df$AÑO == 2017 & sub_df$treated == 0])

new_data_3 <- data.frame(c(2008, 2009, 2010, 2011, 2012, 2014, 2015, 2016, 2017), c(treated_08, treated_09, treated_10, treated_11, treated_12, treated_14, treated_15, treated_16, treated_17), c(untreated_08, untreated_09, untreated_10, untreated_11, untreated_12, untreated_14, untreated_15, untreated_16, untreated_17))

colnames(new_data_3) <- c("year", "Barcelona", "Not Barcelona")

ggplot(new_data_3, mapping = aes(year)) +
  geom_line(aes(y = `Not Barcelona`, colour = "Not Barcelona")) +
  geom_line(aes(y = Barcelona, colour = "Barcelona")) +
  geom_vline(xintercept=2012, linetype = "dotted", color = "black", size = 1.5) +
  scale_x_continuous(breaks = c(2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017)) +
  labs(x = "Year", y = "Tolerance of Migration Policy", color=NULL)

ggsave("tolerance_plot2.png", scale = .8, 
       width = 8.09, height = 5, dpi = 300, 
       units = "in")

```

### Placebo test 

```{r}

# conduct placebo test
# conduct additional DiD estimation using "fake" treatment group
# select comparable group but one that is not affected by the treatment (policy)

# create fake treatment variable: Sevilla = 1

sub_df$Sevilla <- ifelse(sub_df$region == 1, 1, 0)
sub_df$Sevilla <- as.numeric(sub_df$Sevilla)
sum(sub_df$Sevilla == "1")

sub_df_nob <- subset(sub_df, sub_df$region != 9)

lm_regularize_p1 <- lm(regularize ~ Sevilla + Post.2014 + Sevilla*Post.2014 +
                        nationality + ideology + economy, data = sub_df_nob)

summary(lm_regularize_p1)

lm_tolerance_p1 <- lm(tolerance ~ Sevilla + Post.2012 + Sevilla*Post.2012 + nationality +
             ideology + economy, data = sub_df_nob)

summary(lm_tolerance_p1)

# create fake treatment variable: Valencia = 1

sub_df_nob$Valencia <- ifelse(sub_df_nob$region == 10, 1, 0)
sub_df_nob$Valencia <- as.numeric(sub_df_nob$Valencia)
sum(sub_df_nob$Valencia)

lm_regularize_p2 <- lm(regularize ~ Valencia + Post.2014 + Valencia*Post.2014 +
                        nationality + ideology + economy, data = sub_df_nob)

summary(lm_regularize_p2)

lm_tolerance_p2 <- lm(tolerance ~ Valencia + Post.2012 + Valencia*Post.2012 + nationality +
             ideology + economy, data = sub_df_nob)

summary(lm_tolerance_p2)

stargazer(lm_regularize_p1, lm_tolerance_p1, lm_regularize_p2, lm_tolerance_p2)

```